{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/data_connectors/simple_directory_reader.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Simple Directory Reader"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The `SimpleDirectoryReader` is the most commonly used data connector that _just works_.  \n",
    "Simply pass in a input directory or a list of files.  \n",
    "It will select the best file reader based on the file extensions.  "
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Get Started"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: llama-index in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (0.10.11)\n",
      "Requirement already satisfied: llama-index-agent-openai<0.2.0,>=0.1.4 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.4)\n",
      "Requirement already satisfied: llama-index-cli<0.2.0,>=0.1.2 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.3)\n",
      "Requirement already satisfied: llama-index-core<0.11.0,>=0.10.11.post1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index) (0.10.11.post1)\n",
      "Requirement already satisfied: llama-index-embeddings-openai<0.2.0,>=0.1.5 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.5)\n",
      "Requirement already satisfied: llama-index-indices-managed-llama-cloud<0.2.0,>=0.1.2 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.2)\n",
      "Requirement already satisfied: llama-index-legacy<0.10.0,>=0.9.48 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index) (0.9.48)\n",
      "Requirement already satisfied: llama-index-llms-openai<0.2.0,>=0.1.5 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.5)\n",
      "Requirement already satisfied: llama-index-multi-modal-llms-openai<0.2.0,>=0.1.3 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.3)\n",
      "Requirement already satisfied: llama-index-program-openai<0.2.0,>=0.1.3 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.3)\n",
      "Requirement already satisfied: llama-index-question-gen-openai<0.2.0,>=0.1.2 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.2)\n",
      "Requirement already satisfied: llama-index-readers-file<0.2.0,>=0.1.4 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.4)\n",
      "Requirement already satisfied: llama-index-readers-llama-parse<0.2.0,>=0.1.2 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index) (0.1.2)\n",
      "Requirement already satisfied: llama-index-vector-stores-chroma<0.2.0,>=0.1.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.1.2)\n",
      "Requirement already satisfied: PyYAML>=6.0.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (6.0.1)\n",
      "Requirement already satisfied: SQLAlchemy>=1.4.49 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (2.0.27)\n",
      "Requirement already satisfied: aiohttp<4.0.0,>=3.8.6 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (3.9.3)\n",
      "Requirement already satisfied: dataclasses-json in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (0.6.4)\n",
      "Requirement already satisfied: deprecated>=1.2.9.3 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.2.14)\n",
      "Requirement already satisfied: dirtyjson<2.0.0,>=1.0.8 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.0.8)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (2024.2.0)\n",
      "Requirement already satisfied: httpx in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (0.27.0)\n",
      "Requirement already satisfied: llamaindex-py-client<0.2.0,>=0.1.13 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (0.1.13)\n",
      "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.6.0)\n",
      "Requirement already satisfied: networkx>=3.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (3.1)\n",
      "Requirement already satisfied: nltk<4.0.0,>=3.8.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (3.8.1)\n",
      "Requirement already satisfied: numpy in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.24.4)\n",
      "Requirement already satisfied: openai>=1.1.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.12.0)\n",
      "Requirement already satisfied: pandas in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (2.0.3)\n",
      "Requirement already satisfied: pillow>=9.0.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (10.2.0)\n",
      "Requirement already satisfied: requests>=2.31.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (2.31.0)\n",
      "Requirement already satisfied: tenacity<9.0.0,>=8.2.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (8.2.3)\n",
      "Requirement already satisfied: tiktoken>=0.3.3 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (0.6.0)\n",
      "Requirement already satisfied: tqdm<5.0.0,>=4.66.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (4.66.2)\n",
      "Requirement already satisfied: typing-extensions>=4.5.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (4.9.0)\n",
      "Requirement already satisfied: typing-inspect>=0.8.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (0.9.0)\n",
      "Requirement already satisfied: beautifulsoup4<5.0.0,>=4.12.3 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-readers-file<0.2.0,>=0.1.4->llama-index) (4.12.3)\n",
      "Requirement already satisfied: bs4<0.0.3,>=0.0.2 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-readers-file<0.2.0,>=0.1.4->llama-index) (0.0.2)\n",
      "Requirement already satisfied: pymupdf<2.0.0,>=1.23.21 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-readers-file<0.2.0,>=0.1.4->llama-index) (1.23.25)\n",
      "Requirement already satisfied: pypdf<5.0.0,>=4.0.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-readers-file<0.2.0,>=0.1.4->llama-index) (4.0.2)\n",
      "Requirement already satisfied: llama-parse<0.4.0,>=0.3.3 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-readers-llama-parse<0.2.0,>=0.1.2->llama-index) (0.3.4)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.3.1)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (23.2.0)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.4.1)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (6.0.5)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.9.4)\n",
      "Requirement already satisfied: soupsieve>1.2 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from beautifulsoup4<5.0.0,>=4.12.3->llama-index-readers-file<0.2.0,>=0.1.4->llama-index) (2.5)\n",
      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from deprecated>=1.2.9.3->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.16.0)\n",
      "Requirement already satisfied: chromadb<0.5.0,>=0.4.22 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.4.22)\n",
      "Requirement already satisfied: onnxruntime<2.0.0,>=1.17.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.17.0)\n",
      "Requirement already satisfied: tokenizers<0.16.0,>=0.15.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.15.2)\n",
      "Requirement already satisfied: pydantic>=1.10 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from llamaindex-py-client<0.2.0,>=0.1.13->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.10.14)\n",
      "Requirement already satisfied: anyio in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from httpx->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (4.3.0)\n",
      "Requirement already satisfied: certifi in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from httpx->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (2024.2.2)\n",
      "Requirement already satisfied: httpcore==1.* in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from httpx->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.0.4)\n",
      "Requirement already satisfied: idna in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from httpx->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (3.6)\n",
      "Requirement already satisfied: sniffio in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from httpx->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.3.0)\n",
      "Requirement already satisfied: h11<0.15,>=0.13 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from httpcore==1.*->httpx->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (0.14.0)\n",
      "Requirement already satisfied: click in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from nltk<4.0.0,>=3.8.1->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (8.1.7)\n",
      "Requirement already satisfied: joblib in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from nltk<4.0.0,>=3.8.1->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.3.2)\n",
      "Requirement already satisfied: regex>=2021.8.3 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from nltk<4.0.0,>=3.8.1->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (2023.12.25)\n",
      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from openai>=1.1.0->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.9.0)\n",
      "Requirement already satisfied: PyMuPDFb==1.23.22 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from pymupdf<2.0.0,>=1.23.21->llama-index-readers-file<0.2.0,>=0.1.4->llama-index) (1.23.22)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from requests>=2.31.0->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (3.3.2)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from requests>=2.31.0->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (2.0.7)\n",
      "Requirement already satisfied: greenlet!=0.4.17 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (3.0.3)\n",
      "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from typing-inspect>=0.8.0->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.0.0)\n",
      "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from dataclasses-json->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (3.20.2)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from pandas->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2020.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from pandas->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (2024.1)\n",
      "Requirement already satisfied: tzdata>=2022.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from pandas->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (2024.1)\n",
      "Requirement already satisfied: build>=1.0.3 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.0.3)\n",
      "Requirement already satisfied: chroma-hnswlib==0.7.3 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.7.3)\n",
      "Requirement already satisfied: fastapi>=0.95.2 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.109.2)\n",
      "Requirement already satisfied: uvicorn>=0.18.3 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from uvicorn[standard]>=0.18.3->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.27.1)\n",
      "Requirement already satisfied: posthog>=2.4.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (3.4.2)\n",
      "Requirement already satisfied: pulsar-client>=3.1.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (3.4.0)\n",
      "Requirement already satisfied: opentelemetry-api>=1.2.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.22.0)\n",
      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.22.0)\n",
      "Requirement already satisfied: opentelemetry-instrumentation-fastapi>=0.41b0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.43b0)\n",
      "Requirement already satisfied: opentelemetry-sdk>=1.2.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.22.0)\n",
      "Requirement already satisfied: pypika>=0.48.9 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.48.9)\n",
      "Requirement already satisfied: overrides>=7.3.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (7.7.0)\n",
      "Requirement already satisfied: importlib-resources in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (6.1.1)\n",
      "Requirement already satisfied: grpcio>=1.58.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.60.1)\n",
      "Requirement already satisfied: bcrypt>=4.0.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (4.1.2)\n",
      "Requirement already satisfied: typer>=0.9.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.9.0)\n",
      "Requirement already satisfied: kubernetes>=28.1.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (29.0.0)\n",
      "Requirement already satisfied: mmh3>=4.0.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (4.1.0)\n",
      "Requirement already satisfied: packaging>=17.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (23.2)\n",
      "Requirement already satisfied: coloredlogs in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from onnxruntime<2.0.0,>=1.17.0->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (15.0.1)\n",
      "Requirement already satisfied: flatbuffers in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from onnxruntime<2.0.0,>=1.17.0->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (23.5.26)\n",
      "Requirement already satisfied: protobuf in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from onnxruntime<2.0.0,>=1.17.0->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (4.25.3)\n",
      "Requirement already satisfied: sympy in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from onnxruntime<2.0.0,>=1.17.0->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.12)\n",
      "Requirement already satisfied: six>=1.5 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas->llama-index-core<0.11.0,>=0.10.11.post1->llama-index) (1.16.0)\n",
      "Requirement already satisfied: huggingface_hub<1.0,>=0.16.4 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from tokenizers<0.16.0,>=0.15.1->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.20.3)\n",
      "Requirement already satisfied: pyproject_hooks in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from build>=1.0.3->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.0.0)\n",
      "Requirement already satisfied: starlette<0.37.0,>=0.36.3 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from fastapi>=0.95.2->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.36.3)\n",
      "Requirement already satisfied: filelock in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers<0.16.0,>=0.15.1->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (3.13.1)\n",
      "Requirement already satisfied: google-auth>=1.0.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from kubernetes>=28.1.0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (2.28.0)\n",
      "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from kubernetes>=28.1.0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.7.0)\n",
      "Requirement already satisfied: requests-oauthlib in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from kubernetes>=28.1.0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.3.1)\n",
      "Requirement already satisfied: oauthlib>=3.2.2 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from kubernetes>=28.1.0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (3.2.2)\n",
      "Requirement already satisfied: importlib-metadata<7.0,>=6.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from opentelemetry-api>=1.2.0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (6.11.0)\n",
      "Requirement already satisfied: backoff<3.0.0,>=1.10.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (2.2.1)\n",
      "Requirement already satisfied: googleapis-common-protos~=1.52 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.62.0)\n",
      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.22.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.22.0)\n",
      "Requirement already satisfied: opentelemetry-proto==1.22.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.22.0)\n",
      "Requirement already satisfied: opentelemetry-instrumentation-asgi==0.43b0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.43b0)\n",
      "Requirement already satisfied: opentelemetry-instrumentation==0.43b0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.43b0)\n",
      "Requirement already satisfied: opentelemetry-semantic-conventions==0.43b0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.43b0)\n",
      "Requirement already satisfied: opentelemetry-util-http==0.43b0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.43b0)\n",
      "Requirement already satisfied: setuptools>=16.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from opentelemetry-instrumentation==0.43b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (69.1.0)\n",
      "Requirement already satisfied: asgiref~=3.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from opentelemetry-instrumentation-asgi==0.43b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (3.7.2)\n",
      "Requirement already satisfied: monotonic>=1.5 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from posthog>=2.4.0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.6)\n",
      "Requirement already satisfied: httptools>=0.5.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from uvicorn[standard]>=0.18.3->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.6.1)\n",
      "Requirement already satisfied: python-dotenv>=0.13 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from uvicorn[standard]>=0.18.3->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.0.1)\n",
      "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from uvicorn[standard]>=0.18.3->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.19.0)\n",
      "Requirement already satisfied: watchfiles>=0.13 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from uvicorn[standard]>=0.18.3->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.21.0)\n",
      "Requirement already satisfied: websockets>=10.4 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from uvicorn[standard]>=0.18.3->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (12.0)\n",
      "Requirement already satisfied: humanfriendly>=9.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from coloredlogs->onnxruntime<2.0.0,>=1.17.0->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (10.0)\n",
      "Requirement already satisfied: mpmath>=0.19 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from sympy->onnxruntime<2.0.0,>=1.17.0->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (1.3.0)\n",
      "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (5.3.2)\n",
      "Requirement already satisfied: pyasn1-modules>=0.2.1 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.3.0)\n",
      "Requirement already satisfied: rsa<5,>=3.1.4 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (4.9)\n",
      "Requirement already satisfied: zipp>=0.5 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from importlib-metadata<7.0,>=6.0->opentelemetry-api>=1.2.0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (3.17.0)\n",
      "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /Users/sourabhdesai/Library/Caches/pypoetry/virtualenvs/llama-index-cXQhuK8v-py3.11/lib/python3.11/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes>=28.1.0->chromadb<0.5.0,>=0.4.22->llama-index-vector-stores-chroma<0.2.0,>=0.1.1->llama-index-cli<0.2.0,>=0.1.2->llama-index) (0.5.1)\n",
      "\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "!pip install llama-index"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Download Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2024-02-24 19:30:28--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt\n",
      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8000::154, 2606:50c0:8003::154, 2606:50c0:8002::154, ...\n",
      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8000::154|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 75042 (73K) [text/plain]\n",
      "Saving to: ‘data/paul_graham/paul_graham_essay1.txt’\n",
      "\n",
      "data/paul_graham/pa 100%[===================>]  73.28K  --.-KB/s    in 0.05s   \n",
      "\n",
      "2024-02-24 19:30:29 (1.42 MB/s) - ‘data/paul_graham/paul_graham_essay1.txt’ saved [75042/75042]\n",
      "\n",
      "--2024-02-24 19:30:29--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt\n",
      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8000::154, 2606:50c0:8003::154, 2606:50c0:8002::154, ...\n",
      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8000::154|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 75042 (73K) [text/plain]\n",
      "Saving to: ‘data/paul_graham/paul_graham_essay2.txt’\n",
      "\n",
      "data/paul_graham/pa 100%[===================>]  73.28K  --.-KB/s    in 0.04s   \n",
      "\n",
      "2024-02-24 19:30:29 (1.99 MB/s) - ‘data/paul_graham/paul_graham_essay2.txt’ saved [75042/75042]\n",
      "\n",
      "--2024-02-24 19:30:29--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt\n",
      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8000::154, 2606:50c0:8003::154, 2606:50c0:8002::154, ...\n",
      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8000::154|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 75042 (73K) [text/plain]\n",
      "Saving to: ‘data/paul_graham/paul_graham_essay3.txt’\n",
      "\n",
      "data/paul_graham/pa 100%[===================>]  73.28K  --.-KB/s    in 0.03s   \n",
      "\n",
      "2024-02-24 19:30:29 (2.55 MB/s) - ‘data/paul_graham/paul_graham_essay3.txt’ saved [75042/75042]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!mkdir -p 'data/paul_graham/'\n",
    "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay1.txt'\n",
    "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay2.txt'\n",
    "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay3.txt'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core import SimpleDirectoryReader"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load specific files "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "reader = SimpleDirectoryReader(\n",
    "    input_files=[\"./data/paul_graham/paul_graham_essay1.txt\"]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded 1 docs\n"
     ]
    }
   ],
   "source": [
    "docs = reader.load_data()\n",
    "print(f\"Loaded {len(docs)} docs\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load all (top-level) files from directory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "reader = SimpleDirectoryReader(input_dir=\"./data/paul_graham/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded 3 docs\n"
     ]
    }
   ],
   "source": [
    "docs = reader.load_data()\n",
    "print(f\"Loaded {len(docs)} docs\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load all (recursive) files from directory "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!mkdir -p 'data/paul_graham/nested'\n",
    "!echo \"This is a nested file\" > 'data/paul_graham/nested/nested_file.md'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# only load markdown files\n",
    "required_exts = [\".md\"]\n",
    "\n",
    "reader = SimpleDirectoryReader(\n",
    "    input_dir=\"./data\",\n",
    "    required_exts=required_exts,\n",
    "    recursive=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded 1 docs\n"
     ]
    }
   ],
   "source": [
    "docs = reader.load_data()\n",
    "print(f\"Loaded {len(docs)} docs\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create an iterator to load files and process them as they load"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4\n"
     ]
    }
   ],
   "source": [
    "reader = SimpleDirectoryReader(\n",
    "    input_dir=\"./data\",\n",
    "    recursive=True,\n",
    ")\n",
    "\n",
    "all_docs = []\n",
    "for docs in reader.iter_data():\n",
    "    for doc in docs:\n",
    "        # do something with the doc\n",
    "        doc.text = doc.text.upper()\n",
    "        all_docs.append(doc)\n",
    "\n",
    "print(len(all_docs))"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Full Configuration"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is the full list of arguments that can be passed to the `SimpleDirectoryReader`:\n",
    "\n",
    "```python\n",
    "class SimpleDirectoryReader(BaseReader):\n",
    "    \"\"\"Simple directory reader.\n",
    "\n",
    "    Load files from file directory. \n",
    "    Automatically select the best file reader given file extensions.\n",
    "\n",
    "\n",
    "    Args:\n",
    "        input_dir (str): Path to the directory.\n",
    "        input_files (List): List of file paths to read\n",
    "            (Optional; overrides input_dir, exclude)\n",
    "        exclude (List): glob of python file paths to exclude (Optional)\n",
    "        exclude_hidden (bool): Whether to exclude hidden files (dotfiles).\n",
    "        encoding (str): Encoding of the files.\n",
    "            Default is utf-8.\n",
    "        errors (str): how encoding and decoding errors are to be handled,\n",
    "                see https://docs.python.org/3/library/functions.html#open\n",
    "        recursive (bool): Whether to recursively search in subdirectories.\n",
    "            False by default.\n",
    "        filename_as_id (bool): Whether to use the filename as the document id.\n",
    "            False by default.\n",
    "        required_exts (Optional[List[str]]): List of required extensions.\n",
    "            Default is None.\n",
    "        file_extractor (Optional[Dict[str, BaseReader]]): A mapping of file\n",
    "            extension to a BaseReader class that specifies how to convert that file\n",
    "            to text. If not specified, use default from DEFAULT_FILE_READER_CLS.\n",
    "        num_files_limit (Optional[int]): Maximum number of files to read.\n",
    "            Default is None.\n",
    "        file_metadata (Optional[Callable[str, Dict]]): A function that takes\n",
    "            in a filename and returns a Dict of metadata for the Document.\n",
    "            Default is None.\n",
    "\"\"\"\n",
    "```\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llama",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
